Show Code 💡
#|message: false
#|warning: false
library(readr)
library(stringr)
library(ggplot2)
library(tidyverse)
library(lubridate)
library(scales)
library(DT)Shubh Goyal
Invalid Date
This report analyzes NYC payroll data to evaluate potential policies aimed at reducing taxpayer expenses. We examine three policies:
1. Capping salaries at the mayoral level
2. Increasing staffing to reduce overtime
3. A custom policy proposal
The analysis is based on historical payroll data, and all calculations are made using standardized assumptions.
payroll_data <- payroll_data %>%
mutate(
agency_name = str_to_title(agency_name),
last_name = str_to_title(last_name),
first_name = str_to_title(first_name),
work_location_borough = str_to_title(work_location_borough),
title_description = str_to_title(title_description),
leave_status = str_to_title(leave_status_as_of_june_30)
)Rows: 6,225,611
Columns: 19
$ fiscal_year <dbl> 2024, 2024, 2024, 2024, 2024, 2024, 2024, 2…
$ payroll_number <dbl> 67, 67, 67, 67, 67, 67, 67, 67, 67, 67, 67,…
$ agency_name <chr> "Admin For Children's Svcs", "Admin For Chi…
$ last_name <chr> "Faye Fall", "Kilgore", "Wisdom", "Miller",…
$ first_name <chr> "Sokhna", "Orlantha", "Cherise", "Moya-Gaye…
$ mid_init <chr> "M", "B", "M", "S", "M", "L", "O", NA, "N",…
$ agency_start_date <dttm> 2023-11-20, 2023-08-28, 2022-10-24, 2023-0…
$ work_location_borough <chr> "Bronx", "Brooklyn", "Manhattan", "Manhatta…
$ title_description <chr> "Child Protective Specialist", "Child Prote…
$ leave_status_as_of_june_30 <chr> "ACTIVE", "ACTIVE", "ON LEAVE", "ON LEAVE",…
$ base_salary <dbl> 62043, 62043, 43144, 62043, 60236, 62043, 6…
$ pay_basis <chr> "per Annum", "per Annum", "per Annum", "per…
$ regular_hours <dbl> 1050.00, 1470.00, 1251.50, 1400.75, 700.00,…
$ regular_gross_paid <dbl> 31267.96, 44660.96, 28649.20, 44515.43, 221…
$ ot_hours <dbl> 12.00, 99.75, 30.00, 44.75, 53.00, 146.00, …
$ total_ot_paid <dbl> 425.00, 3859.84, 802.42, 1476.98, 1933.33, …
$ total_other_pay <dbl> 78.04, 78.14, 78.26, 78.37, 78.47, 78.86, 7…
$ leave_status <chr> "Active", "Active", "On Leave", "On Leave",…
$ reg_hours <dbl> 1050.00, 1470.00, 1251.50, 1400.75, 700.00,…
mayor_data <- payroll_data %>%
filter(str_detect( first_name, "Eric") & str_detect( last_name , "Adams")) %>%
select( fiscal_year , title_description , agency_name , base_salary ) %>%
arrange( fiscal_year )
mayor_data %>%
mutate( base_salary = dollar( base_salary )) %>%
datatable(options = list( searching = FALSE, paging = FALSE, info = FALSE))payroll_data <- payroll_data %>%
mutate (total_compensation = case_when(
pay_basis == "per Annum" ~ base_salary,
pay_basis == "per Hour" ~ base_salary * regular_hours + (base_salary * 1.5 * ot_hours),
pay_basis == "per Day" ~ base_salary * (regular_hours / 7.5),TRUE ~ base_salary ) )
datatable(payroll_data %>%
select(first_name, last_name, agency_name, title_description, pay_basis, base_salary, regular_hours, ot_hours, total_compensation) %>%
arrange(desc(total_compensation)) %>%
slice_head(n = 10),options = list(scrollX = TRUE))This section answers critical payroll-related questions using NYC Payroll Data.
# A tibble: 1 × 3
title_description agency_name hourly_rate
<chr> <chr> <dbl>
1 Chair Nyc Housing Authority 207.
highest_earning_employee <- payroll_data %>%
mutate(total_compensation = base_salary + total_ot_paid + total_other_pay) %>%
arrange(desc(total_compensation)) %>%
select(fiscal_year, first_name, last_name, title_description, agency_name, total_compensation) %>%
slice(1)
print(highest_earning_employee)# A tibble: 1 × 6
fiscal_year first_name last_name title_description agency_name
<dbl> <chr> <chr> <chr> <chr>
1 2024 Mark Tettonis Chief Marine Engineer Department Of Transpor…
# ℹ 1 more variable: total_compensation <dbl>
# A tibble: 1 × 6
fiscal_year first_name last_name title_description agency_name ot_hours
<dbl> <chr> <chr> <chr> <chr> <dbl>
1 2022 James Internicola Correction Officer Department Of … 3693.
# A tibble: 1 × 2
agency_name avg_total_pay
<chr> <dbl>
1 Office Of Racial Equity 153102.
# A tibble: 11 × 3
# Groups: fiscal_year [11]
fiscal_year agency_name employee_count
<dbl> <chr> <int>
1 2014 Dept Of Ed Pedagogical 100589
2 2015 Dept Of Ed Pedagogical 111857
3 2016 Dept Of Ed Pedagogical 106263
4 2017 Dept Of Ed Pedagogical 104629
5 2018 Dept Of Ed Pedagogical 107956
6 2019 Dept Of Ed Pedagogical 112067
7 2020 Dept Of Ed Pedagogical 114999
8 2021 Dept Of Ed Pedagogical 113523
9 2022 Dept Of Ed Pedagogical 120453
10 2023 Dept Of Ed Pedagogical 106882
11 2024 Dept Of Ed Pedagogical 108209
# A tibble: 1 × 2
agency_name avg_overtime_ratio
<chr> <dbl>
1 Fire Department 0.135
# A tibble: 1 × 1
avg_salary
<dbl>
1 53730.
payroll_growth <- payroll_data %>%
group_by(fiscal_year) %>% summarize(total_payroll = sum(base_salary + total_ot_paid + total_other_pay, na.rm = TRUE)) %>%
arrange(fiscal_year) %>%
mutate(payroll_growth = (total_payroll / lag(total_payroll) - 1) * 100,
payroll_growth = paste0(round(payroll_growth, 2), "%"))
payroll_growth# A tibble: 11 × 3
fiscal_year total_payroll payroll_growth
<dbl> <dbl> <chr>
1 2014 22638474550. NA%
2 2015 25470285473. 12.51%
3 2016 26510880597. 4.09%
4 2017 27208527853. 2.63%
5 2018 27162686275. -0.17%
6 2019 29546368177. 8.78%
7 2020 32146285486. 8.8%
8 2021 31552393078. -1.85%
9 2022 34872407402. 10.52%
10 2023 33319364876. -4.45%
11 2024 34700020886. 4.14%
total_savings <- sum(high_salaries$total_compensation_emp - high_salaries$total_compensation_mayor, na.rm = TRUE)
datatable(
high_salaries %>%
select(fiscal_year, first_name, last_name, title_description, agency_name, total_compensation_emp, total_compensation_mayor) %>%
slice_head(n = 100),
options = list(scrollX = TRUE)
)overtime_reduction <- payroll_data %>%
group_by(agency_name, title_description) %>%
summarize( total_overtime_hours = sum(ot_hours, na.rm = TRUE), full_time_equivalent_needed = total_overtime_hours / 2000 ) %>% arrange(desc(total_overtime_hours))
datatable(overtime_reduction, options = list(scrollX = TRUE))overtime_savings <- payroll_data %>%
group_by(agency_name, title_description) %>%
summarize( overtime_cost = sum(1.5 * base_salary * ot_hours, na.rm = TRUE),
regular_cost = sum(base_salary * (ot_hours / 40), na.rm = TRUE),
potential_savings = overtime_cost - regular_cost ) %>%
arrange(desc(potential_savings))
datatable(overtime_savings, options = list(scrollX = TRUE))agency_savings <- overtime_savings %>%
group_by(agency_name) %>%
summarize( total_overtime_cost = sum(overtime_cost, na.rm = TRUE),
total_regular_cost = sum(regular_cost, na.rm = TRUE),
total_savings = sum(potential_savings, na.rm = TRUE) ) %>%
arrange(desc(total_savings))
datatable(agency_savings, options = list(scrollX = TRUE))Many NYC agencies rely heavily on overtime (OT) to compensate for staffing shortages, significantly inflating payroll costs. Instead of paying excessive overtime wages, the city could hire additional employees to reduce OT dependency.
Analysis Steps:
Find employees with excessive overtime hours (e.g., more than 500 OT hours per year).
high_overtime_employees <- payroll_data %>%
filter(ot_hours > 500) %>%
group_by(title_description, agency_name) %>%
summarize( avg_overtime_hours = mean(ot_hours, na.rm = TRUE),
total_overtime_pay = sum(total_ot_paid, na.rm = TRUE), num_high_overtime_employees = n(),
avg_base_salary = mean(base_salary, na.rm = TRUE) ) %>%
arrange(desc(total_overtime_pay))
datatable(high_overtime_employees, options = list(scrollX = TRUE))Assuming a standard 2,000-hour work year
estimated_new_hires <- high_overtime_employees %>%
mutate( equivalent_full_time_positions = round(avg_overtime_hours / 2000, 1), total_new_hire_salary = equivalent_full_time_positions * avg_base_salary ) %>%
select(agency_name, title_description, num_high_overtime_employees, equivalent_full_time_positions, total_overtime_pay, total_new_hire_salary)
datatable(estimated_new_hires, options = list(scrollX = TRUE))Compare the total overtime cost vs. the cost of hiring new employees
Find agencies where overtime reliance is highest
library(dplyr)
overtime_by_agency <- payroll_data %>%
group_by(agency_name) %>%
summarize(
total_overtime_pay = sum(total_ot_paid, na.rm = TRUE),
total_base_salary = sum(base_salary, na.rm = TRUE),
ot_to_salary_ratio = total_overtime_pay / total_base_salary # Corrected calculation
) %>%
arrange(desc(ot_to_salary_ratio))Each policy presents unique benefits and trade-offs. Based on financial impact and feasibility:
Best Policy Choice: Policy III (Strategic Overtime Hiring).
Secondary Option: Policy II (Broad Hiring to Cut Overtime).
Least Effective: Policy I (Salary Cap), due to talent retention concerns.
Implement Policy 3 with phased hiring & overtime monitoring ( Final Recommendation )